Setup

library(data.table)
library(DBI)
library(ggplot2)
library(cowplot)
library(grid)
library(lme4)
library(lmerTest)

Sys.setlocale("LC_TIME", "en_US.UTF-8") # Print English date format
[1] "en_US.UTF-8"
en_US.UTF-8
# Sys.setlocale("LC_TIME", "nl_NL.UTF-8") # Print Dutch date format

number_format <- scales::number_format(big.mark = ",", decimal.mark = ".") # Print English number format
# number_format <- scales::number_format(big.mark = ".", decimal.mark = ",") # Print Dutch number format

theme_paper <- theme_classic(base_size = 12) + 
  theme(axis.text = element_text(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey92"))

School closure and opening dates

Sources: - https://www.rijksoverheid.nl/actueel/nieuws/2020/03/15/aanvullende-maatregelen-onderwijs-horeca-sport - https://www.rijksoverheid.nl/actueel/nieuws/2020/05/19/onderwijs-gaat-stap-voor-stap-open

date_schools_closed <- as.POSIXct("2020-03-16")
date_schools_opened <- as.POSIXct("2020-06-02")

Handle database connections

db_connect <- function() {
  db <- dbConnect(RSQLite::SQLite(), file.path("..", "data", "noordhoff.sqlite"))
  return(db)
}

db_disconnect <- function(db) {
  dbDisconnect(db)
}

Data

The database contains all SlimStampen data collected via Noordhoff’s platform in three courses: Stepping Stones (English), Grandes Lignes (French), and Neue Kontakte (German).

Trial-level response data are stored in the responses table. Book information, such as the course year, book title, and chapter, are stored in the book_info table.

responses

Column Type Explanation
date int UNIX time stamp [s]
user_id chr unique user identifier
method chr course
start_time int elapsed time since session start [ms]
rt int response time [ms]
duration int trial duration [ms]
fact_id int unique fact identifier (within chapter)
correct int response accuracy
answer chr user’s response
choices int number of answer choices (1 == open response)
backspace_used dbl user pressed backspace during trial
backspace_used_first dbl user erased first character of response
study int trial was a study trial
answer_language chr language of the answer
subsession int identifies part within learning session
book_info_id chr unique identifier of book information

book_info

Column Type Explanation
book_info_id chr unique identifier of book information
method_group chr year and edition
book_title chr book title (incl. year, level, edition)
book_type chr type of book
chapter chr chapter number and title

Preview first 10 rows

db <- db_connect()
responses_top <- dbGetQuery(db, "SELECT * FROM responses LIMIT 10")
responses_top
book_info_top <- dbGetQuery(db, "SELECT * FROM book_info LIMIT 10")
book_info_top
db_disconnect(db)

Performance

There are several measures of learning performance we can look at. The most straight-forward of these are response accuracy and response time.

Important factors to keep in mind: question type (multiple choice or open answer) and language. Note that we cannot distinguish between NL-X and X-X, since we only know the language of the answer.

Response accuracy

Whole population

db <- db_connect()
correct <- dbGetQuery(db, 
                      "SELECT r.method AS 'method',
                      DATE(r.date + 3600, 'unixepoch') AS 'doy',
                      r.user_id AS 'user',
                      r.choices > 1 AS 'mcq',
                      r.correct AS 'correct',
                      COUNT(*) AS 'n'
                      FROM 'responses' r
                      WHERE r.study == 0
                      GROUP BY r.method,
                      DATE(r.date + 3600, 'unixepoch'),
                      r.user_id,
                      r.choices > 1,
                      r.correct"
)
setDT(correct)
db_disconnect(db)

Fill in missing rows (where all trials on a day were correct/incorrect):

correct <- tidyr::complete(correct, tidyr::nesting(method, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct)
correct[, mcq := as.logical(mcq)]
accuracy <- correct[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(method, doy, user, mcq)]

Add a school year column (cutoff date: 1 August):

accuracy[, doy_posix := as.POSIXct(doy)]
accuracy[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]

Add sensible course names:

accuracy[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]

Align school years:

accuracy[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy[school_year == "19/20", doy_posix_aligned := doy_posix]

Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.

accuracy[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_by_week_and_user <- accuracy[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]
# accuracy_by_week_and_user <- accuracy[, .(accuracy = mean(accuracy, na.rm = TRUE)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
# accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
#                               accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]

Add question type labels:

accuracy_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]

Plot response accuracy by week (mean +/- 1 SE).

p_acc <- ggplot(accuracy_by_week[(course == "English" & mcq == TRUE) | course == "French",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(. ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format(accuracy = 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper
p_acc
Warning: Removed 24 row(s) containing missing values (geom_path).

ggsave("../output/acc_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).

By level and year

db <- db_connect()
correct_strat <- dbGetQuery(db, 
                      "SELECT r.method AS 'method',
                      r.book_info_id AS 'book_info_id',
                      DATE(r.date + 3600, 'unixepoch') AS 'doy',
                      r.user_id AS 'user',
                      r.choices > 1 AS 'mcq',
                      r.correct AS 'correct',
                      COUNT(*) AS 'n'
                      FROM 'responses' r
                      WHERE r.study == 0
                      GROUP BY r.method,
                      r.book_info_id,
                      DATE(r.date + 3600, 'unixepoch'),
                      r.user_id,
                      r.choices > 1,
                      r.correct"
)
setDT(correct_strat)
db_disconnect(db)

Fill in missing rows (where all trials on a day were correct/incorrect):

correct_strat <- tidyr::complete(correct_strat, tidyr::nesting(method, book_info_id, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct_strat)
correct_strat[, mcq := as.logical(mcq)]

Add book information:

db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)

setDT(book_info)
correct_strat[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]

Add sensible course names:

correct_strat[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]

Add a school year column (cutoff date: 1 August):

correct_strat[, doy_posix := as.POSIXct(doy)]
correct_strat[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]

Simplify level names:

# Keep all distinctions
correct_strat[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
correct_strat[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
correct_strat[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary\n(havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
  grepl("havo", book_title) ~ "General secondary\n(havo)",
  grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
  TRUE ~ "Other")]
correct_strat[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]

Simplify year names:

correct_strat[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]

Consolidate by day:

accuracy_strat <- correct_strat[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(school_year, doy_posix, course, level, year, user, mcq)]

Align school years:

accuracy_strat[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy_strat[school_year == "19/20", doy_posix_aligned := doy_posix]

Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.

accuracy_strat[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy_strat[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
accuracy_strat_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, year, user, mcq)]
accuracy_strat_by_week <- accuracy_strat_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, year, mcq)]

Add question type labels:

accuracy_strat_by_week_and_user[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
accuracy_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]

How many unique users per group?

accuracy_strat_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, level, year, school_year, question_type)]

Plot response accuracy by week (mean +/- 1 SE).

p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "French" & level != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.4, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).

ggsave("../output/acc_by_question_type_french_level_year.pdf", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_french_level_year.png", width = 9, height = 5)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "English" & level != "Other" & question_type == "Multiple\nchoice",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line() +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.4, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).

ggsave("../output/acc_by_question_type_english_level_year.pdf", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_english_level_year.png", width = 9, height = 5)
Warning: Removed 5 row(s) containing missing values (geom_path).

By level

accuracy_level_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, user, mcq)]

accuracy_level_by_week <- accuracy_level_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, mcq)]

Add question type labels:

accuracy_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]

How many users in each group?

accuracy_level_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, level, school_year, mcq)]

Plot response accuracy by week (mean +/- 1 SE).

p_acc_level <- ggplot(accuracy_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.6, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level
Warning: Removed 20 row(s) containing missing values (geom_path).

ggsave("../output/acc_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).

By year

accuracy_year_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, year, user, mcq)]

accuracy_year_by_week <- accuracy_year_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, year, mcq)]

Add question type labels:

accuracy_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]

How many users in each group?

accuracy_year_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, year, school_year, mcq)]

Plot response accuracy by week (mean +/- 1 SE).

p_acc_year <- ggplot(accuracy_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(year ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.6, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_year
Warning: Removed 14 row(s) containing missing values (geom_path).

ggsave("../output/acc_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/acc_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).

Regression model

Fit a mixed effects model to the daily accuracy data:

accuracy[, period := dplyr::case_when(
  doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
  doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
  doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown open answer in 19/20
accuracy[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
accuracy[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
accuracy[, mcq := factor(mcq, levels = c(FALSE, TRUE))]

Since we know the number of trials per day and the proportion correct (accuracy), we can use a binomial GLMM:

if(!file.exists("../output/m_acc_fit.rds")) {
  m_acc <- glmer(accuracy ~ period*school_year*mcq + (1 | user) + (1 | course),
                 data = accuracy[(course == "English" & mcq == TRUE) | course == "French",],
                 family = "binomial", 
                 weights = n,
                 nAGQ = 0,
                 control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
  saveRDS(m_acc, "../output/m_acc_fit.rds")
} else {
  m_acc <- readRDS("../output/m_acc_fit.rds")
}

m_acc_summary <- summary(m_acc)
m_acc_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
  Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
 Family: binomial  ( logit )
Formula: accuracy ~ period * school_year * mcq + (1 | user) + (1 | course)
   Data: 
accuracy[(course == "English" & mcq == TRUE) | course == "French",      ]
Weights: n
Control: 
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))

     AIC      BIC   logLik deviance df.resid 
 4963073  4963232 -2481522  4963045   671406 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-23.9963  -0.9227   0.1606   1.0936  11.4371 

Random effects:
 Groups Name        Variance Std.Dev.
 user   (Intercept) 0.35034  0.5919  
 course (Intercept) 0.05511  0.2348  
Number of obs: 671420, groups:  user, 133419; course, 2

Fixed effects:
                                                Estimate Std. Error
(Intercept)                                     1.631257   0.166016
periodduring-lockdown                           0.259227   0.002725
periodpost-lockdown                             0.150384   0.004021
school_year18/19                                0.117385   0.002566
mcqTRUE                                         0.960249   0.002088
periodduring-lockdown:school_year18/19         -0.311338   0.004409
periodpost-lockdown:school_year18/19           -0.242816   0.006325
periodduring-lockdown:mcqTRUE                  -0.328352   0.002881
periodpost-lockdown:mcqTRUE                    -0.277485   0.004397
school_year18/19:mcqTRUE                        0.003357   0.002667
periodduring-lockdown:school_year18/19:mcqTRUE  0.379504   0.004758
periodpost-lockdown:school_year18/19:mcqTRUE    0.335859   0.006970
                                                z value Pr(>|z|)    
(Intercept)                                       9.826   <2e-16 ***
periodduring-lockdown                            95.142   <2e-16 ***
periodpost-lockdown                              37.399   <2e-16 ***
school_year18/19                                 45.755   <2e-16 ***
mcqTRUE                                         459.903   <2e-16 ***
periodduring-lockdown:school_year18/19          -70.613   <2e-16 ***
periodpost-lockdown:school_year18/19            -38.387   <2e-16 ***
periodduring-lockdown:mcqTRUE                  -113.954   <2e-16 ***
periodpost-lockdown:mcqTRUE                     -63.110   <2e-16 ***
school_year18/19:mcqTRUE                          1.259    0.208    
periodduring-lockdown:school_year18/19:mcqTRUE   79.765   <2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE     48.183   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
                  (Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck       -0.007                                           
prdpst-lckd       -0.005  0.346                                    
schl_y18/19       -0.008  0.452  0.297                             
mcqTRUE           -0.010  0.534  0.352  0.592                      
prdd-:_18/19       0.005 -0.626 -0.217 -0.505   -0.340             
prdp-:_18/19       0.003 -0.229 -0.640 -0.351   -0.233  0.241      
prddr-:TRUE        0.007 -0.824 -0.278 -0.412   -0.649  0.516      
prdps-:TRUE        0.004 -0.269 -0.824 -0.264   -0.419  0.169      
s_18/19:TRU        0.007 -0.409 -0.270 -0.837   -0.697  0.450      
prdd-:_18/19:TRUE -0.004  0.506  0.171  0.439    0.405 -0.854      
prdp-:_18/19:TRUE -0.003  0.176  0.523  0.298    0.273 -0.190      
                  prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck                                                  
prdpst-lckd                                                  
schl_y18/19                                                  
mcqTRUE                                                      
prdd-:_18/19                                                 
prdp-:_18/19                                                 
prddr-:TRUE        0.183                                     
prdps-:TRUE        0.527        0.315                        
s_18/19:TRU        0.311        0.481      0.310             
prdd-:_18/19:TRUE -0.194       -0.612     -0.194     -0.523  
prdp-:_18/19:TRUE -0.843       -0.205     -0.634     -0.358  
                  prdd-:_18/19:TRUE
prddrng-lck                        
prdpst-lckd                        
schl_y18/19                        
mcqTRUE                            
prdd-:_18/19                       
prdp-:_18/19                       
prddr-:TRUE                        
prdps-:TRUE                        
s_18/19:TRU                        
prdd-:_18/19:TRUE                  
prdp-:_18/19:TRUE  0.221           

Save coefficients as a table for in the paper:

m_acc_coef <- as.data.frame(m_acc_summary$coefficients)
setDT(m_acc_coef, keep.rownames = TRUE)
m_acc_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
                   "Period: lockdown",
                   "Period: post-lockdown",
                   "School year: 18/19",
                   "Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19",
                   "Period: post-lockdown $\\times$ School year: 18/19",
                   "Period: lockdown $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ Question type: multiple choice",
                   "School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")

# Format p-values
m_acc_coef$`Pr(>|z|)` <- format.pval(m_acc_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_acc_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_acc_coef$`Pr(>|z|)`) # Remove leading zero

cat(knitr::kable(m_acc_coef,
                 align = c("l","r", "r", "r", "r"),
                 digits = c(NA, 3, 3, 2, NA),
                 col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
                 format = "latex",
                 booktabs = TRUE,
                 escape = FALSE),
    file = "../output/m_acc_table.tex")

Visualise the model fit:

acc_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
acc_fit <- cbind(acc_fit, accuracy = predict(m_acc, type = "response", re.form = NA, newdata = acc_fit))
acc_fit
ggplot(acc_fit, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
  theme_paper

Empirical means:

accuracy_mean <- accuracy[(course == "English" & mcq == TRUE) | course == "French", .(accuracy = sum(accuracy * n)/sum(n)), by = .(period, school_year, mcq, user, course)][, .(accuracy = mean(accuracy), accuracy_sd = sd(accuracy)), by = .(period, school_year, mcq)]
accuracy_mean
ggplot(accuracy_mean, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
  theme_paper

Response time

db <- db_connect()
rt <- dbGetQuery(db, 
                 "SELECT r.method AS 'method',
                  r.book_info_id AS 'book_info_id',
                  DATE(r.date + 3600, 'unixepoch') AS 'doy',
                  r.user_id AS 'user',
                  r.choices > 1 AS 'mcq',
                  r.rt AS 'rt'
                  FROM 'responses' r
                  WHERE r.study == 0
                  AND r.correct == 1
                 "
)
setDT(rt)
db_disconnect(db)
doys <- rt[, .(doy = unique(doy))][, doy_posix := as.POSIXct(doy)][]
doys[, doy_posix_week := cut.POSIXt(as.POSIXct(doy), "week")]
doys[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
doys[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
doys[school_year == "19/20", doy_posix_aligned := doy_posix]
doys[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
doys[, period := dplyr::case_when(
  doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
  doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
  doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]
# Reorder factor levels so that intercept is pre-lockdown in 19/20
doys[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
doys[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
rt <- rt[doys, on = "doy"]
rt[, mcq := as.factor(as.logical(mcq))]
rt[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]

Throw out trials with negative RTs (timing errors)

rt <- rt[rt > 0]

Whole population

rt_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, doy_posix_week)]

rt_by_week <- rt_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, doy_posix_week)]

Overlap the two school years:

rt_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]

Add question type labels:

rt_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]

Plot response time by week (mean +/- 1 SE).

p_rt <- ggplot(rt_by_week[(course == "English" & mcq == TRUE) | course == "French",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(. ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1.7, 3.7)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt
Warning: Removed 24 row(s) containing missing values (geom_path).

ggsave("../output/rt_by_question_type.pdf", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type.png", width = 9, height = 3)
Warning: Removed 24 row(s) containing missing values (geom_path).

By level and year

Add book info:

rt[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]

Simplify level names:

# Keep all distinctions
rt[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
rt[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
rt[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary\n(havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
  grepl("havo", book_title) ~ "General secondary\n(havo)",
  grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
  TRUE ~ "Other")]
rt[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]

Simplify year names:

rt[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]
rt_strat_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, year, doy_posix_week)]

rt_strat_by_week <- rt_strat_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, year, doy_posix_week)]

Overlap the two school years:

rt_strat_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_strat_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]

Add question type labels:

rt_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_strat_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]

Plot response time by week (mean +/- 1 SE).

p_rt_level_year <- ggplot(rt_strat_by_week[course == "French",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level_year
Warning: Removed 11 row(s) containing missing values (geom_path).

ggsave("../output/rt_by_question_type_french_level_year.pdf", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_french_level_year.png", width = 9, height = 3)
Warning: Removed 11 row(s) containing missing values (geom_path).
p_rt_level_year <- ggplot(rt_strat_by_week[course == "English" & question_type == "Multiple\nchoice" & level != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level_year
Warning: Removed 5 row(s) containing missing values (geom_path).

ggsave("../output/rt_by_question_type_english_level_year.pdf", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_english_level_year.png", width = 9, height = 3)
Warning: Removed 5 row(s) containing missing values (geom_path).

By level

rt_level_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, doy_posix_week)]

rt_level_by_week <- rt_level_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, doy_posix_week)]

Overlap the two school years:

rt_level_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_level_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]

Add question type labels:

rt_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_level_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]

Plot response time by week (mean +/- 1 SE).

p_rt_level <- ggplot(rt_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 6)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level
Warning: Removed 20 row(s) containing missing values (geom_path).

ggsave("../output/rt_by_question_type_level.pdf", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_level.png", width = 9, height = 5)
Warning: Removed 20 row(s) containing missing values (geom_path).

By year

rt_year_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, year, doy_posix_week)]

rt_year_by_week <- rt_year_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, year, doy_posix_week)]

Overlap the two school years:

rt_year_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_year_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]

Add question type labels:

rt_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
rt_year_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]

Plot response time by week (mean +/- 1 SE).

p_rt_year <- ggplot(rt_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(year ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_year
Warning: Removed 14 row(s) containing missing values (geom_path).

ggsave("../output/rt_by_question_type_year.pdf", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).
ggsave("../output/rt_by_question_type_year.png", width = 9, height = 5)
Warning: Removed 14 row(s) containing missing values (geom_path).

Regression model

rt_model_dat <- rt[, .(rt_median = median(rt)), by = .(course, school_year, period, doy_posix, mcq, user)]

Fit a generalised linear mixed effects model (assuming a Gamma distribution for RT and an identity link function; Lo & Andrew, 2015) to the daily median RT:

if(!file.exists("../output/m_rt_fit.rds")) {
  m_rt <- glmer(rt_median ~ period*school_year*mcq + (1 | user) + (1 | course),
                 data = rt_model_dat[(course == "English" & mcq == TRUE) | course == "French",],
                 family = Gamma(link = "identity"),
                 nAGQ = 0,
                 control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
  saveRDS(m_rt, "../output/m_rt_fit.rds")
} else {
  m_rt <- readRDS("../output/m_rt_fit.rds")
}

m_rt_summary <- summary(m_rt)
m_rt_summary
Generalized linear mixed model fit by maximum likelihood (Adaptive
  Gauss-Hermite Quadrature, nAGQ = 0) [glmerMod]
 Family: Gamma  ( identity )
Formula: 
rt_median ~ period * school_year * mcq + (1 | user) + (1 | course)
   Data: rt_model_dat[(course == "English" & mcq == TRUE) | course ==  
    "French", ]
Control: 
glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))

     AIC      BIC   logLik deviance df.resid 
10332190 10332361 -5166080 10332160   669366 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
 -2.732  -0.345  -0.070   0.241 220.533 

Random effects:
 Groups   Name        Variance  Std.Dev.
 user     (Intercept) 2.828e+05 531.8041
 course   (Intercept) 2.852e+02  16.8883
 Residual             1.324e-01   0.3639
Number of obs: 669381, groups:  user, 133398; course, 2

Fixed effects:
                                               Estimate Std. Error t value
(Intercept)                                    2133.210     13.019 163.848
periodduring-lockdown                           207.475      6.937  29.909
periodpost-lockdown                             198.282     10.619  18.673
school_year18/19                                 28.172      6.564   4.292
mcqTRUE                                         150.852      5.382  28.028
periodduring-lockdown:school_year18/19         -280.435     11.362 -24.681
periodpost-lockdown:school_year18/19           -294.190     17.057 -17.247
periodduring-lockdown:mcqTRUE                  -210.118      7.464 -28.151
periodpost-lockdown:mcqTRUE                    -199.308     11.721 -17.005
school_year18/19:mcqTRUE                        -62.059      6.894  -9.002
periodduring-lockdown:school_year18/19:mcqTRUE  278.331     12.257  22.708
periodpost-lockdown:school_year18/19:mcqTRUE    315.189     18.735  16.824
                                               Pr(>|z|)    
(Intercept)                                     < 2e-16 ***
periodduring-lockdown                           < 2e-16 ***
periodpost-lockdown                             < 2e-16 ***
school_year18/19                               1.77e-05 ***
mcqTRUE                                         < 2e-16 ***
periodduring-lockdown:school_year18/19          < 2e-16 ***
periodpost-lockdown:school_year18/19            < 2e-16 ***
periodduring-lockdown:mcqTRUE                   < 2e-16 ***
periodpost-lockdown:mcqTRUE                     < 2e-16 ***
school_year18/19:mcqTRUE                        < 2e-16 ***
periodduring-lockdown:school_year18/19:mcqTRUE  < 2e-16 ***
periodpost-lockdown:school_year18/19:mcqTRUE    < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
                  (Intr) prddr- prdps- sc_18/19 mcTRUE prdd-:_18/19
prddrng-lck       -0.251                                           
prdpst-lckd       -0.161  0.334                                    
schl_y18/19       -0.277  0.483  0.311                             
mcqTRUE           -0.335  0.567  0.368  0.619                      
prdd-:_18/19       0.159 -0.617 -0.207 -0.533   -0.352             
prdp-:_18/19       0.105 -0.214 -0.626 -0.356   -0.234  0.229      
prddr-:TRUE        0.226 -0.861 -0.281 -0.434   -0.651  0.530      
prdps-:TRUE        0.143 -0.275 -0.854 -0.275   -0.413  0.170      
s_18/19:TRU        0.245 -0.440 -0.284 -0.879   -0.703  0.483      
prdd-:_18/19:TRUE -0.141  0.529  0.173  0.474    0.403 -0.882      
prdp-:_18/19:TRUE -0.092  0.176  0.537  0.311    0.263 -0.191      
                  prdp-:_18/19 prdd-:TRUE prdp-:TRUE s_18/19:
prddrng-lck                                                  
prdpst-lckd                                                  
schl_y18/19                                                  
mcqTRUE                                                      
prdd-:_18/19                                                 
prdp-:_18/19                                                 
prddr-:TRUE        0.179                                     
prdps-:TRUE        0.534        0.305                        
s_18/19:TRU        0.322        0.490      0.310             
prdd-:_18/19:TRUE -0.195       -0.613     -0.187     -0.539  
prdp-:_18/19:TRUE -0.872       -0.194     -0.627     -0.354  
                  prdd-:_18/19:TRUE
prddrng-lck                        
prdpst-lckd                        
schl_y18/19                        
mcqTRUE                            
prdd-:_18/19                       
prdp-:_18/19                       
prddr-:TRUE                        
prdps-:TRUE                        
s_18/19:TRU                        
prdd-:_18/19:TRUE                  
prdp-:_18/19:TRUE  0.212           

Save coefficients as a table for in the paper:

m_rt_coef <- as.data.frame(m_rt_summary$coefficients)
setDT(m_rt_coef, keep.rownames = TRUE)
m_rt_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
                   "Period: lockdown",
                   "Period: post-lockdown",
                   "School year: 18/19",
                   "Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19",
                   "Period: post-lockdown $\\times$ School year: 18/19",
                   "Period: lockdown $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ Question type: multiple choice",
                   "School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")

# Format p-values
m_rt_coef$`Pr(>|z|)` <- format.pval(m_rt_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_rt_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_rt_coef$`Pr(>|z|)`) # Remove leading zero

cat(knitr::kable(m_rt_coef,
                 align = c("l","r", "r", "r", "r"),
                 digits = c(NA, 3, 3, 2, NA),
                 col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
                 format = "latex",
                 booktabs = TRUE,
                 escape = FALSE),
    file = "../output/m_rt_table.tex")

Visualise the model fit:

rt_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
rt_fit <- cbind(rt_fit, rt = predict(m_rt, type = "response", re.form = NA, newdata = rt_fit))
rt_fit
ggplot(rt_fit, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(1500, 4000)) +
  theme_paper

Empirical means:

rt_mean <- rt_model_dat[(course == "English" & mcq == TRUE) | course == "French", .(rt = mean(rt_median)), by = .(period, school_year, mcq, user, course)][, .(rt = mean(rt), rt_sd = sd(rt)), by = .(period, school_year, mcq, course)]
rt_mean[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
rt_mean
ggplot(rt_mean, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  facet_grid(~ course) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(1500, 3000)) +
  theme_paper

Combination plot

p_legend <- get_legend(p_acc)
Warning: Removed 24 row(s) containing missing values (geom_path).
p_acc <- p_acc +
  guides(colour = FALSE, fill = FALSE, lty = FALSE)

p_rt <- p_rt +
  guides(colour = FALSE, fill = FALSE, lty = FALSE)

Combine plots:

plot_grid(
  plot_grid(p_acc, p_rt,
            ncol = 1,
            labels = c("A", "B")),
  p_legend,
  rel_widths = c(1, .2)
)
Warning: Removed 24 row(s) containing missing values (geom_path).

Warning: Removed 24 row(s) containing missing values (geom_path).

ggsave("../output/combi_acc_rt.pdf", width = 9, height = 3.5)
ggsave("../output/combi_acc_rt.png", width = 9, height = 3.5)

Learning progress

Get the unique book chapter IDs on each day:

db <- db_connect()

progress <- dbGetQuery(db,
                       "SELECT DISTINCT r.book_info_id AS 'book_info_id',
                        r.method AS 'method',
                        DATE(r.date + 3600, 'unixepoch') AS 'doy',
                        COUNT(*) AS 'trials'
                        FROM 'responses' r
                        GROUP BY r.method,
                        r.book_info_id,
                        DATE(r.date + 3600, 'unixepoch');"
                       )

db_disconnect(db)

setDT(progress)

Join with the book chapter information:

db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)

setDT(book_info)
progress[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title", "chapter") := .(i.method_group, i.book_title, i.chapter)]

Add sensible course names:

progress[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]

Add a school year column (cutoff date: 1 August):

progress[, doy_posix := as.POSIXct(doy)]
progress[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]

Consolidate count by day and chapter:

progress_by_day <- progress[, .(trials = sum(trials)), by = .(school_year, doy_posix, course, method_group, book_title, chapter)]

Simplify level names:

# Keep all distinctions
progress_by_day[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
progress_by_day[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]
# Simplify to three levels
progress_by_day[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary (havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational (vmbo)",
  grepl("havo", book_title) ~ "General secondary (havo)",
  grepl("vwo", book_title) ~ "Pre-university (vwo)",
  TRUE ~ "Other")]
progress_by_day[, level := factor(level, levels = c("Other", "Pre-vocational (vmbo)", "General secondary (havo)", "Pre-university (vwo)"))]

Simplify year names:

progress_by_day[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]

Simplify chapter names:

# In most cases, the chapter name starts with a number
progress_by_day[, chapter_simple := factor(as.numeric(stringr::str_extract(chapter, "^\\d{1,2}")))]
# Remaining cases:
unique(progress_by_day[is.na(chapter_simple),]$chapter)
 [1] "BS2 Dienstleistung"               
 [2] "BS5 Reisen"                       
 [3] "BS1 Familie und Beziehungen"      
 [4] "BS3 Dienstleistung"               
 [5] "BS4 Reisen und Verkehr"           
 [6] "Lernliste Brückenschlag"          
 [7] "BS2 Freizeit"                     
 [8] "BS1 Schule und Ausbildung"        
 [9] "Bridging the Gap Year 2"          
[10] "Bridging the Gap Year 1"          
[11] "Bridging the Gap Exam Preparation"
[12] "Exam Preparation"                 
[13] "Bridging the Gap mbo"             
[14] "Bridging the Gap havo"            
BS2 Dienstleistung

BS5 Reisen

BS1 Familie und Beziehungen

BS3 Dienstleistung

BS4 Reisen und Verkehr

Lernliste Brückenschlag

BS2 Freizeit

BS1 Schule und Ausbildung

Bridging the Gap Year 2

Bridging the Gap Year 1

Bridging the Gap Exam Preparation

Exam Preparation

Bridging the Gap mbo

Bridging the Gap havo
# Combine these chapters into an "other" category
progress_by_day[is.na(chapter_simple), chapter_simple := "O"]

Align school years:

progress_by_day[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
progress_by_day[school_year == "19/20", doy_posix_aligned := doy_posix]

Use cut.Date() to bin dates by week and month. Each day is assigned the date of the most recent Monday.

progress_by_day[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
progress_by_day[, doy_posix_aligned_month := cut.POSIXt(doy_posix_aligned, "month")]

Calculate proportions by week and month:

progress_by_week <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_week, course, level, year, chapter_simple)]
progress_by_week[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_week, course, level, year)]
progress_by_month <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_month, course, level, year, chapter_simple)]
progress_by_month[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_month, course, level, year)]
setorder(progress_by_week, chapter_simple)
setorder(progress_by_month, chapter_simple)

French

p_french_y1 <- ggplot(progress_by_week[course == "French" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_french_y2 <- ggplot(progress_by_week[course == "French" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_french_y3 <- ggplot(progress_by_week[course == "French" & year == "Year 3/4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_progress_french <- plot_grid(p_french_y1, p_french_y2, p_french_y3, 
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3/4"),
          hjust = -0.1,
          scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 47 rows containing missing values (position_stack).
Warning: Removed 42 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
p_progress_french

ggsave("../output/progress_french.pdf", width = 9, height = 9)
ggsave("../output/progress_french.png", width = 9, height = 6)

Did the share of trials change between school years? We can simplify the analysis by aggregating over the whole lockdown period.

progress_lockdown <- progress_by_day[between(doy_posix_aligned, date_schools_closed, date_schools_opened), .(trials = sum(trials)), by = .(school_year, course, level, year, chapter_simple)]

# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_lockdown <- as.data.table(tidyr::complete(progress_lockdown, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0))) 
  
progress_lockdown[, prop := trials/sum(trials), by = .(school_year, course, level, year)]
setorder(progress_lockdown, chapter_simple)
ggplot(progress_lockdown[course == "French"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(level ~ year) +
  geom_col(colour = NA) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter",
       title = "French") +
  theme_paper

Perform a chi-square test of homogeneity to determine whether school years are significantly different.

for (y in sort(unique(progress_lockdown$year))) {
  for (l in levels(progress_lockdown$level)) {
    d <- progress_lockdown[course == "French" & year == y & level == l]
    if (nrow(d) > 0) {
      print(paste("French", y, l, collapse= " "))
      print(
        chisq.test(
          dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
        )
      )
    }
  } 
}
[1] "French Year 1 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 27254, df = 5, p-value < 2.2e-16

[1] "French Year 1 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 218921, df = 5, p-value < 2.2e-16

[1] "French Year 1 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 38951, df = 5, p-value < 2.2e-16

[1] "French Year 2 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 12940, df = 5, p-value < 2.2e-16

[1] "French Year 2 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 50301, df = 5, p-value < 2.2e-16

[1] "French Year 2 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 44901, df = 5, p-value < 2.2e-16

[1] "French Year 3/4 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 30099, df = 7, p-value < 2.2e-16

[1] "French Year 3/4 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 22949, df = 5, p-value < 2.2e-16

[1] "French Year 3/4 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 21144, df = 5, p-value < 2.2e-16

Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).

Visualise the change between school years:

progress_lockdown[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(x = chapter_simple, y = (prop_change * 100), colour = chapter_simple, group = school_year)) +
  facet_grid(level ~ year, scales = "free_x") +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(xend = chapter_simple), yend = 0) +
  geom_point() +
  scale_y_continuous(limits = c(-25, 25)) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)",
       title = "French") +
  theme_paper

Are these changes really important? We may expect a certain amount of fluctuation between any pair of school years. We don’t have data from before the 18/19 school year, but we can look at how the magnitude of changes during the lockdown period compares to changes earlier in the school year.

To keep things as comparable as possible, use a sliding time window with the same size as the lockdown period:

time_window <- as.numeric(round(date_schools_opened - date_schools_closed))
time_window
[1] 78
date_range <- sort(unique(progress_by_day$doy_posix_aligned))
date_range <- date_range[date_range < date_schools_closed]

prop_change_window <- data.table()

for (i in 1:(length(date_range) - as.numeric(time_window))) {
  d <- date_range[i:(i + time_window - 1)]
  progress_window <- progress_by_day[course %in% c("French", "English") & doy_posix_aligned %in% d,
                                     .(trials = sum(trials)),
                                     by = .(school_year, course, level, year, chapter_simple)]
  
  # Fill in missing rows (occurs when chapter was only studied in one of the two years)
  progress_window <- as.data.table(tidyr::complete(progress_window, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0))) 
  
  progress_window[, prop := trials/sum(trials), by = .(school_year, course, level, year)]

  progress_window[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
  
  prop_change_window <- rbind(prop_change_window, progress_window[school_year == "19/20",][,window := i][,date_min := min(d)][,date_max := max(d)])
}

The density of year-to-year changes can be visualised by time window:

ggplot(prop_change_window, aes(x = prop_change * 100, y = window, group = window)) +
  ggridges::geom_density_ridges(alpha = 0.1, scale = 25, fill = NA) +
  labs(x = "Change in trial share\n(percentage points)",
         y = "Time window") +
  theme_paper
Picking joint bandwidth of 0.583

Compare the aggregated density to the changes during the lockdown period:

prop_change_combined <- rbind(prop_change_window[, period := "Pre-lockdown"], progress_lockdown[course %in% c("French", "English") & school_year == "19/20", period := "Lockdown"], fill = TRUE)
prop_change_combined[, period := factor(period, levels = c("Pre-lockdown", "Lockdown"))]
ggplot(prop_change_combined, aes(x = prop_change, colour = period)) +
  geom_density() +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = "Change in trial share\n(percentage points)",
       y = "Density",
       colour = NULL) +
  theme_paper

prop_change_sd <- prop_change_window[, .(sd = sd(prop_change) * 100), by = .(course, year, level)]

Add boundaries based on the typical spread to the change plot:

p_change_french <- ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(colour = chapter_simple)) +
  facet_grid(year ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
  scale_y_continuous(breaks = c(-20, 0, 20)) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)") +
  theme_paper +
  theme(panel.grid.major.y = element_blank())

p_change_french

ggsave("../output/progress_change_french.pdf", width = 5, height = 4)
ggsave("../output/progress_change_french.png", width = 9, height = 3)

Make a combination plot for in the paper:

plot_grid(p_french_y1, p_french_y2, p_french_y3, p_change_french,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3/4", "Change"),
          rel_heights = c(1, 1, 1, 1.5),
          hjust = -0.1,
          scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 47 rows containing missing values (position_stack).
Warning: Removed 42 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).

ggsave("../output/progress_combi_french.pdf", width = 9, height = 9)
ggsave("../output/progress_combi_french.png", width = 9, height = 9)
p_change_french_y1 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 1"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_french_y2 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 2"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20),  limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_french_y3 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 3/4"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20),  limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

filler_plot <- qplot() + 
  theme_nothing() + 
  theme(panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

plot_grid(
          filler_plot,
          p_french_y1, filler_plot, p_change_french_y1, filler_plot,
          p_french_y2, filler_plot, p_change_french_y2, filler_plot, 
          p_french_y3, filler_plot, p_change_french_y3,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c(NA,
                     "Year 1", NA, NA, NA,
                     "Year 2", NA, NA, NA,
                     "Year 3/4", NA, NA),
          rel_heights = c(.1, 
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75),
          hjust = -0.1,
          vjust = -0.1,
          scale = .95)
Warning: Removed 48 rows containing missing values (position_stack).
Warning: Removed 41 rows containing missing values (geom_col).
Warning: Removed 47 rows containing missing values (position_stack).
Warning: Removed 42 rows containing missing values (geom_col).
Warning: Removed 24 rows containing missing values (position_stack).
Warning: Removed 23 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

ggsave("../output/progress_combi_alt_french.pdf", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_french.png", width = 9, height = 9)
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

English

NOTE: chapters without a number (Bridging the Gap, Exam Preparation) are shown as “O” in the plot. They don’t seem to fit neatly in the chapter sequence, so I’m grouping them together.

p_english_y1 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y2 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y3 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 3"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y4 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_progress_english <- plot_grid(p_english_y1, p_english_y2, p_english_y3, p_english_y4,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3", "Year 4"),
          hjust = -0.1,
          scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 71 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
p_progress_english

ggsave("../output/progress_english.pdf", width = 9, height = 9)
ggsave("../output/progress_english.png", width = 9, height = 9)

Did the share of trials change between school years?

ggplot(progress_lockdown[course == "English" & level != "Other"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(level ~ year) +
  geom_col(colour = NA) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter",
       title = "English") +
  theme_paper

Change between school years:

p_change_english <- ggplot(progress_lockdown[school_year == "19/20" & course == "English" & level != "Other"], aes(colour = chapter_simple)) +
  facet_grid(year ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)") +
  theme_paper +
  theme(panel.grid.major.y = element_blank())

p_change_english 

ggsave("../output/progress_change_english.pdf", width = 9, height = 6)
ggsave("../output/progress_change_english.png", width = 9, height = 6)

Perform a chi-square test of homogeneity to determine whether school years are significantly different.

for (y in sort(unique(progress_lockdown$year))) {
  for (l in levels(progress_lockdown$level)) {
    d <- progress_lockdown[course == "English" & year == y & level == l]
    if (nrow(d) > 0) {
      print(paste("English", y, l, collapse= " "))
      print(
        chisq.test(
          dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
        )
      )
    }
  } 
}
[1] "English Year 1 Other"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 1907.7, df = 7, p-value < 2.2e-16

[1] "English Year 1 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 57847, df = 7, p-value < 2.2e-16

[1] "English Year 1 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 237876, df = 7, p-value < 2.2e-16

[1] "English Year 1 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 47518, df = 7, p-value < 2.2e-16

[1] "English Year 2 Other"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 4115.7, df = 7, p-value < 2.2e-16

[1] "English Year 2 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 25974, df = 8, p-value < 2.2e-16

[1] "English Year 2 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 15326, df = 8, p-value < 2.2e-16

[1] "English Year 2 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 81417, df = 7, p-value < 2.2e-16

[1] "English Year 3 Other"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 16118, df = 6, p-value < 2.2e-16

[1] "English Year 3 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 19636, df = 6, p-value < 2.2e-16

[1] "English Year 3 General secondary (havo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 59552, df = 8, p-value < 2.2e-16

[1] "English Year 3 Pre-university (vwo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 41150, df = 7, p-value < 2.2e-16

[1] "English Year 4 Pre-vocational (vmbo)"

    Pearson's Chi-squared test

data:  dcast(d, school_year ~ chapter_simple, value.var = "trials",     fill = 0)[, `:=`(school_year, NULL)]
X-squared = 20200, df = 5, p-value < 2.2e-16

Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).

Make a combination plot for in the paper:

progress_lockdown_english <- progress_lockdown[level != "Other" & school_year == "19/20" & course == "English"]
progress_lockdown_english[, level := factor(level)]
p_change_english_y1 <- ggplot(progress_lockdown_english[year == "Year 1"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y2 <- ggplot(progress_lockdown_english[year == "Year 2"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y3 <- ggplot(progress_lockdown_english[year == "Year 3"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y4 <- ggplot(progress_lockdown_english[year == "Year 4"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

filler_plot <- qplot() + 
  theme_nothing() + 
  theme(panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

plot_grid(
          filler_plot,
          p_english_y1, filler_plot, p_change_english_y1, filler_plot,
          p_english_y2, filler_plot, p_change_english_y2, filler_plot, 
          p_english_y3, filler_plot, p_change_english_y3, filler_plot,
          p_english_y4, filler_plot, p_change_english_y4,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c(NA,
                     "Year 1", NA, NA, NA,
                     "Year 2", NA, NA, NA,
                     "Year 3", NA, NA, NA,
                     "Year 4", NA, NA),
          rel_heights = c(.1, 
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75),
          hjust = -0.1,
          vjust = -0.1,
          scale = .95)
Warning: Removed 61 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 71 rows containing missing values (position_stack).
Warning: Removed 61 rows containing missing values (geom_col).
Warning: Removed 49 rows containing missing values (position_stack).
Warning: Removed 50 rows containing missing values (geom_col).
Warning: Removed 7 rows containing missing values (position_stack).
Warning: Removed 4 rows containing missing values (geom_col).
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

ggsave("../output/progress_combi_alt_english.pdf", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).
ggsave("../output/progress_combi_alt_english.png", width = 9, height = 11)
Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Warning: Removed 1 rows containing missing values (geom_text).

Session info

sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=nl_NL.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=nl_NL.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] lmerTest_3.1-0    lme4_1.1-21       Matrix_1.2-18     cowplot_0.9.4    
[5] ggplot2_3.3.2     DBI_1.1.0         data.table_1.13.6

loaded via a namespace (and not attached):
 [1] tidyselect_0.2.5    xfun_0.21           purrr_0.3.2        
 [4] splines_3.6.3       lattice_0.20-41     colorspace_1.4-1   
 [7] vctrs_0.2.2         htmltools_0.3.6     viridisLite_0.3.0  
[10] yaml_2.2.0          blob_1.2.1          rlang_0.4.4        
[13] pillar_1.4.2        nloptr_1.2.1        glue_1.3.1         
[16] withr_2.3.0         bit64_0.9-7         plyr_1.8.4         
[19] lifecycle_0.1.0     stringr_1.4.0       munsell_0.5.0      
[22] gtable_0.3.0        evaluate_0.14       memoise_1.1.0      
[25] labeling_0.3        knitr_1.23          Rcpp_1.0.2         
[28] scales_1.0.0        jsonlite_1.6        bit_1.1-14         
[31] digest_0.6.19       stringi_1.4.3       dplyr_0.8.3        
[34] numDeriv_2016.8-1.1 tools_3.6.3         magrittr_1.5       
[37] tibble_2.1.3        RSQLite_2.2.0       crayon_1.3.4       
[40] tidyr_1.0.0         pkgconfig_2.0.2     MASS_7.3-51.4      
[43] ellipsis_0.3.0      ggridges_0.5.1      assertthat_0.2.1   
[46] minqa_1.2.4         rmarkdown_2.6       R6_2.4.0           
[49] boot_1.3-25         nlme_3.1-149        compiler_3.6.3     
---
title: 'SlimStampen Performance During Lockdown'
author: "Maarten van der Velde"
date: "Last updated: `r Sys.Date()`"
output:
  github_document:
    toc: yes
  html_notebook:
    smart: no
    toc: yes
    toc_float: yes
editor_options: 
  chunk_output_type: inline
---

# Setup

```{r}
library(data.table)
library(DBI)
library(ggplot2)
library(cowplot)
library(grid)
library(lme4)
library(lmerTest)

Sys.setlocale("LC_TIME", "en_US.UTF-8") # Print English date format
# Sys.setlocale("LC_TIME", "nl_NL.UTF-8") # Print Dutch date format

number_format <- scales::number_format(big.mark = ",", decimal.mark = ".") # Print English number format
# number_format <- scales::number_format(big.mark = ".", decimal.mark = ",") # Print Dutch number format

theme_paper <- theme_classic(base_size = 12) + 
  theme(axis.text = element_text(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey92"))
```

School closure and opening dates

Sources:
- https://www.rijksoverheid.nl/actueel/nieuws/2020/03/15/aanvullende-maatregelen-onderwijs-horeca-sport
- https://www.rijksoverheid.nl/actueel/nieuws/2020/05/19/onderwijs-gaat-stap-voor-stap-open
```{r}
date_schools_closed <- as.POSIXct("2020-03-16")
date_schools_opened <- as.POSIXct("2020-06-02")
```


Handle database connections
```{r}
db_connect <- function() {
  db <- dbConnect(RSQLite::SQLite(), file.path("..", "data", "noordhoff.sqlite"))
  return(db)
}

db_disconnect <- function(db) {
  dbDisconnect(db)
}
```


# Data

The database contains all SlimStampen data collected via Noordhoff's platform in three courses: *Stepping Stones* (English), *Grandes Lignes* (French), and *Neue Kontakte* (German).

Trial-level response data are stored in the `responses` table.
Book information, such as the course year, book title, and chapter, are stored in the `book_info` table.

## `responses`

| Column               | Type      | Explanation                                   |
|----------------------|-----------|-----------------------------------------------|
| date                 | int       | UNIX time stamp [s]                           |
| user_id              | chr       | unique user identifier                        |
| method               | chr       | course                                        |
| start_time           | int       | elapsed time since session start [ms]         |
| rt                   | int       | response time [ms]                            |
| duration             | int       | trial duration [ms]                           |
| fact_id              | int       | unique fact identifier (within chapter)       |
| correct              | int       | response accuracy                             |
| answer               | chr       | user's response                               |
| choices              | int       | number of answer choices (1 == open response) |
| backspace_used       | dbl       | user pressed backspace during trial           |
| backspace_used_first | dbl       | user erased first character of response       |
| study                | int       | trial was a study trial                       |
| answer_language      | chr       | language of the answer                        |
| subsession           | int       | identifies part within learning session       |
| book_info_id         | chr       | unique identifier of book information         |


## `book_info`

| Column               | Type      | Explanation                                   |
|----------------------|-----------|-----------------------------------------------|
| book_info_id         | chr       | unique identifier of book information         |
| method_group         | chr       | year and edition                              |
| book_title           | chr       | book title (incl. year, level, edition)       |
| book_type            | chr       | type of book                                  |
| chapter              | chr       | chapter number and title                      |


Preview first 10 rows
```{r}
db <- db_connect()
responses_top <- dbGetQuery(db, "SELECT * FROM responses LIMIT 10")
responses_top

book_info_top <- dbGetQuery(db, "SELECT * FROM book_info LIMIT 10")
book_info_top
db_disconnect(db)
```


# Performance

There are several measures of learning performance we can look at.
The most straight-forward of these are response accuracy and response time.

Important factors to keep in mind: question type (multiple choice or open answer) and language.
Note that we cannot distinguish between NL-X and X-X, since we only know the language of the answer.

## Response accuracy

### Whole population

```{r}
db <- db_connect()
correct <- dbGetQuery(db, 
                      "SELECT r.method AS 'method',
                      DATE(r.date + 3600, 'unixepoch') AS 'doy',
                      r.user_id AS 'user',
                      r.choices > 1 AS 'mcq',
                      r.correct AS 'correct',
                      COUNT(*) AS 'n'
                      FROM 'responses' r
                      WHERE r.study == 0
                      GROUP BY r.method,
                      DATE(r.date + 3600, 'unixepoch'),
                      r.user_id,
                      r.choices > 1,
                      r.correct"
)
setDT(correct)
db_disconnect(db)
```

Fill in missing rows (where all trials on a day were correct/incorrect):
```{r}
correct <- tidyr::complete(correct, tidyr::nesting(method, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct)
```

```{r}
correct[, mcq := as.logical(mcq)]
```

```{r}
accuracy <- correct[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(method, doy, user, mcq)]
```

Add a school year column (cutoff date: 1 August):
```{r}
accuracy[, doy_posix := as.POSIXct(doy)]
accuracy[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
```

Add sensible course names:
```{r}
accuracy[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
```

Align school years:
```{r}
accuracy[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy[school_year == "19/20", doy_posix_aligned := doy_posix]
```

Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
```{r}
accuracy[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
```

```{r}
accuracy_by_week_and_user <- accuracy[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]
```


```{r}
# accuracy_by_week_and_user <- accuracy[, .(accuracy = mean(accuracy, na.rm = TRUE)), by = .(course, school_year, doy_posix_aligned_week, user, mcq)]
# accuracy_by_week <- accuracy_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
#                               accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, mcq)]
```

Add question type labels:
```{r}
accuracy_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```


Plot response accuracy by week (mean +/- 1 SE).
```{r}
p_acc <- ggplot(accuracy_by_week[(course == "English" & mcq == TRUE) | course == "French",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(. ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format(accuracy = 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper
p_acc
ggsave("../output/acc_by_question_type.pdf", width = 9, height = 3)
ggsave("../output/acc_by_question_type.png", width = 9, height = 3)
```


### By level and year

```{r}
db <- db_connect()
correct_strat <- dbGetQuery(db, 
                      "SELECT r.method AS 'method',
                      r.book_info_id AS 'book_info_id',
                      DATE(r.date + 3600, 'unixepoch') AS 'doy',
                      r.user_id AS 'user',
                      r.choices > 1 AS 'mcq',
                      r.correct AS 'correct',
                      COUNT(*) AS 'n'
                      FROM 'responses' r
                      WHERE r.study == 0
                      GROUP BY r.method,
                      r.book_info_id,
                      DATE(r.date + 3600, 'unixepoch'),
                      r.user_id,
                      r.choices > 1,
                      r.correct"
)
setDT(correct_strat)
db_disconnect(db)
```

Fill in missing rows (where all trials on a day were correct/incorrect):
```{r}
correct_strat <- tidyr::complete(correct_strat, tidyr::nesting(method, book_info_id, doy, user, mcq), correct, fill = list(n = 0))
setDT(correct_strat)
```

```{r}
correct_strat[, mcq := as.logical(mcq)]
```

Add book information:
```{r}
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)

setDT(book_info)
```

```{r}
correct_strat[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
```

Add sensible course names:
```{r}
correct_strat[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
```

Add a school year column (cutoff date: 1 August):
```{r}
correct_strat[, doy_posix := as.POSIXct(doy)]
correct_strat[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
```

Simplify level names:
```{r}
# Keep all distinctions
correct_strat[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
correct_strat[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]

# Simplify to three levels
correct_strat[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary\n(havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
  grepl("havo", book_title) ~ "General secondary\n(havo)",
  grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
  TRUE ~ "Other")]
correct_strat[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
```

Simplify year names:
```{r}
correct_strat[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]
```

Consolidate by day:
```{r}
accuracy_strat <- correct_strat[, .(accuracy = n[correct == 1]/sum(n), n = sum(n)), by = .(school_year, doy_posix, course, level, year, user, mcq)]
```

Align school years:
```{r}
accuracy_strat[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
accuracy_strat[school_year == "19/20", doy_posix_aligned := doy_posix]
```

Use cut.Date() to bin dates by week. Each day is assigned the date of the most recent Monday.
```{r}
accuracy_strat[, doy_posix_week := cut.POSIXt(doy_posix, "week")]
accuracy_strat[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
```

```{r}
accuracy_strat_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, year, user, mcq)]
accuracy_strat_by_week <- accuracy_strat_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, year, mcq)]
```

Add question type labels:
```{r}
accuracy_strat_by_week_and_user[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
accuracy_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

How many unique users per group?
```{r}
accuracy_strat_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, level, year, school_year, question_type)]
```


Plot response accuracy by week (mean +/- 1 SE).
```{r}
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "French" & level != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.4, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level_year

ggsave("../output/acc_by_question_type_french_level_year.pdf", width = 9, height = 5)
ggsave("../output/acc_by_question_type_french_level_year.png", width = 9, height = 5)
```

```{r}
p_acc_level_year <- ggplot(accuracy_strat_by_week[course == "English" & level != "Other" & question_type == "Multiple\nchoice",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line() +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.4, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level_year

ggsave("../output/acc_by_question_type_english_level_year.pdf", width = 9, height = 5)
ggsave("../output/acc_by_question_type_english_level_year.png", width = 9, height = 5)
```

### By level

```{r}
accuracy_level_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, level, user, mcq)]

accuracy_level_by_week <- accuracy_level_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, level, mcq)]
```

Add question type labels:
```{r}
accuracy_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

How many users in each group?
```{r}
accuracy_level_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, level, school_year, mcq)]
```


Plot response accuracy by week (mean +/- 1 SE).
```{r}
p_acc_level <- ggplot(accuracy_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.6, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_level

ggsave("../output/acc_by_question_type_level.pdf", width = 9, height = 5)
ggsave("../output/acc_by_question_type_level.png", width = 9, height = 5)
```


### By year

```{r}
accuracy_year_by_week_and_user <- accuracy_strat[, .(accuracy = sum(accuracy*n)/sum(n)), by = .(course, school_year, doy_posix_aligned_week, year, user, mcq)]

accuracy_year_by_week <- accuracy_year_by_week_and_user[, .(accuracy_mean = mean(accuracy, na.rm = TRUE),
                              accuracy_se = sd(accuracy, na.rm = TRUE)/sqrt(.N), n = .N), by = .(course, school_year, doy_posix_aligned_week, year, mcq)]
```

Add question type labels:
```{r}
accuracy_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

How many users in each group?
```{r}
accuracy_year_by_week_and_user[, .(unique_users = length(unique(user))),  by = .(course, year, school_year, mcq)]
```


Plot response accuracy by week (mean +/- 1 SE).
```{r}
p_acc_year <- ggplot(accuracy_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
            aes(x = as.POSIXct(doy_posix_aligned_week), y = accuracy_mean, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(year ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1.05, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = accuracy_mean - accuracy_se, ymax = accuracy_mean + accuracy_se, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  coord_cartesian(ylim = c(.6, 1)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Accuracy",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_acc_year

ggsave("../output/acc_by_question_type_year.pdf", width = 9, height = 5)
ggsave("../output/acc_by_question_type_year.png", width = 9, height = 5)
```



### Regression model

Fit a mixed effects model to the daily accuracy data:
```{r}
accuracy[, period := dplyr::case_when(
  doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
  doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
  doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]

# Reorder factor levels so that intercept is pre-lockdown open answer in 19/20
accuracy[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
accuracy[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
accuracy[, mcq := factor(mcq, levels = c(FALSE, TRUE))]
```

Since we know the number of trials per day and the proportion correct (accuracy), we can use a binomial GLMM:
```{r}
if(!file.exists("../output/m_acc_fit.rds")) {
  m_acc <- glmer(accuracy ~ period*school_year*mcq + (1 | user) + (1 | course),
                 data = accuracy[(course == "English" & mcq == TRUE) | course == "French",],
                 family = "binomial", 
                 weights = n,
                 nAGQ = 0,
                 control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
  saveRDS(m_acc, "../output/m_acc_fit.rds")
} else {
  m_acc <- readRDS("../output/m_acc_fit.rds")
}

m_acc_summary <- summary(m_acc)
m_acc_summary
```

Save coefficients as a table for in the paper:
```{r}
m_acc_coef <- as.data.frame(m_acc_summary$coefficients)
setDT(m_acc_coef, keep.rownames = TRUE)
m_acc_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
                   "Period: lockdown",
                   "Period: post-lockdown",
                   "School year: 18/19",
                   "Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19",
                   "Period: post-lockdown $\\times$ School year: 18/19",
                   "Period: lockdown $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ Question type: multiple choice",
                   "School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")

# Format p-values
m_acc_coef$`Pr(>|z|)` <- format.pval(m_acc_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_acc_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_acc_coef$`Pr(>|z|)`) # Remove leading zero

cat(knitr::kable(m_acc_coef,
                 align = c("l","r", "r", "r", "r"),
                 digits = c(NA, 3, 3, 2, NA),
                 col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
                 format = "latex",
                 booktabs = TRUE,
                 escape = FALSE),
    file = "../output/m_acc_table.tex")
```

Visualise the model fit:
```{r}
acc_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
acc_fit <- cbind(acc_fit, accuracy = predict(m_acc, type = "response", re.form = NA, newdata = acc_fit))
acc_fit

ggplot(acc_fit, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
  theme_paper
```

Empirical means:
```{r}
accuracy_mean <- accuracy[(course == "English" & mcq == TRUE) | course == "French", .(accuracy = sum(accuracy * n)/sum(n)), by = .(period, school_year, mcq, user, course)][, .(accuracy = mean(accuracy), accuracy_sd = sd(accuracy)), by = .(period, school_year, mcq)]
accuracy_mean
```

```{r}
ggplot(accuracy_mean, aes(x = period, y = accuracy, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(.7, 1), labels = scales::percent_format()) +
  theme_paper
```




## Response time

```{r}
db <- db_connect()
rt <- dbGetQuery(db, 
                 "SELECT r.method AS 'method',
                  r.book_info_id AS 'book_info_id',
                  DATE(r.date + 3600, 'unixepoch') AS 'doy',
                  r.user_id AS 'user',
                  r.choices > 1 AS 'mcq',
                  r.rt AS 'rt'
                  FROM 'responses' r
                  WHERE r.study == 0
                  AND r.correct == 1
                 "
)
setDT(rt)
db_disconnect(db)
```

```{r}
doys <- rt[, .(doy = unique(doy))][, doy_posix := as.POSIXct(doy)][]
doys[, doy_posix_week := cut.POSIXt(as.POSIXct(doy), "week")]
doys[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
doys[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
doys[school_year == "19/20", doy_posix_aligned := doy_posix]
doys[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
doys[, period := dplyr::case_when(
  doy_posix_aligned >= date_schools_opened ~ "post-lockdown",
  doy_posix_aligned >= date_schools_closed & doy_posix_aligned < date_schools_opened ~ "during-lockdown",
  doy_posix_aligned < date_schools_opened ~ "pre-lockdown"
)]

# Reorder factor levels so that intercept is pre-lockdown in 19/20
doys[, period := factor(period, levels = c("pre-lockdown", "during-lockdown", "post-lockdown"))]
doys[, school_year := factor(school_year, levels = c("19/20", "18/19"))]
```

```{r}
rt <- rt[doys, on = "doy"]
```

```{r}
rt[, mcq := as.factor(as.logical(mcq))]
```

```{r}
rt[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
```

Throw out trials with negative RTs (timing errors)
```{r}
rt <- rt[rt > 0]
```



### Whole population

```{r}
rt_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, doy_posix_week)]

rt_by_week <- rt_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, doy_posix_week)]
```

Overlap the two school years:
```{r}
rt_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
```

Add question type labels:
```{r}
rt_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

```{r}
rt_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
```


Plot response time by week (mean +/- 1 SE).
```{r}
p_rt <- ggplot(rt_by_week[(course == "English" & mcq == TRUE) | course == "French",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(. ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1.7, 3.7)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt

ggsave("../output/rt_by_question_type.pdf", width = 9, height = 3)
ggsave("../output/rt_by_question_type.png", width = 9, height = 3)
```

### By level and year

Add book info:
```{r}
rt[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title") := .(i.method_group, i.book_title)]
```

Simplify level names:
```{r}
# Keep all distinctions
rt[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
rt[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]

# Simplify to three levels
rt[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary\n(havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational\n(vmbo)",
  grepl("havo", book_title) ~ "General secondary\n(havo)",
  grepl("vwo", book_title) ~ "Pre-university\n(vwo)",
  TRUE ~ "Other")]
rt[, level := factor(level, levels = c("Other", "Pre-vocational\n(vmbo)", "General secondary\n(havo)", "Pre-university\n(vwo)"))]
```

Simplify year names:
```{r}
rt[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]
```


```{r}
rt_strat_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, year, doy_posix_week)]

rt_strat_by_week <- rt_strat_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, year, doy_posix_week)]
```

Overlap the two school years:
```{r}
rt_strat_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_strat_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
```

Add question type labels:
```{r}
rt_strat_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

```{r}
rt_strat_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
```


Plot response time by week (mean +/- 1 SE).
```{r}
p_rt_level_year <- ggplot(rt_strat_by_week[course == "French",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level_year

ggsave("../output/rt_by_question_type_french_level_year.pdf", width = 9, height = 3)
ggsave("../output/rt_by_question_type_french_level_year.png", width = 9, height = 3)
```

```{r}
p_rt_level_year <- ggplot(rt_strat_by_week[course == "English" & question_type == "Multiple\nchoice" & level != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ year) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level_year

ggsave("../output/rt_by_question_type_english_level_year.pdf", width = 9, height = 3)
ggsave("../output/rt_by_question_type_english_level_year.png", width = 9, height = 3)
```

### By level

```{r}
rt_level_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, level, doy_posix_week)]

rt_level_by_week <- rt_level_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, level, doy_posix_week)]
```

Overlap the two school years:
```{r}
rt_level_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_level_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
```

Add question type labels:
```{r}
rt_level_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

```{r}
rt_level_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
```


Plot response time by week (mean +/- 1 SE).
```{r}
p_rt_level <- ggplot(rt_level_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & level != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(level ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 6)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_level

ggsave("../output/rt_by_question_type_level.pdf", width = 9, height = 5)
ggsave("../output/rt_by_question_type_level.png", width = 9, height = 5)
```

### By year

```{r}
rt_year_med <- rt[, .(rt_median = median(rt)), by = .(school_year, mcq, user, course, year, doy_posix_week)]

rt_year_by_week <- rt_year_med[, .(rt = mean(rt_median), rt_se = sd(rt_median)/sqrt(.N)), by = .(school_year, mcq, course, year, doy_posix_week)]
```

Overlap the two school years:
```{r}
rt_year_by_week[school_year == "18/19", doy_posix_week_aligned := as.POSIXct(as.POSIXct(doy_posix_week) + 365*24*60*60, origin = "1970-01-01")]
rt_year_by_week[school_year == "19/20", doy_posix_week_aligned := as.POSIXct(doy_posix_week)]
```

Add question type labels:
```{r}
rt_year_by_week[, question_type := ifelse(mcq == TRUE, "Multiple\nchoice", "Open\nanswer")]
```

```{r}
rt_year_by_week[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
```


Plot response time by week (mean +/- 1 SE).
```{r}
p_rt_year <- ggplot(rt_year_by_week[((course == "English" & question_type == "Multiple\nchoice") | course == "French") & year != "Other",],
            aes(x = doy_posix_week_aligned, y = rt/1e3, group = interaction(school_year, question_type), colour = school_year, fill = school_year)) +
  facet_grid(year ~ course) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = 0, ymax = 1000, fill = "grey92", colour = "grey50", lty = 2, alpha = .9) +
  geom_ribbon(aes(ymin = rt/1e3 - rt_se/1e3, ymax = rt/1e3 + rt_se/1e3, colour = NULL), alpha = 0.2) +
  geom_line(aes(lty = question_type)) +
  scale_x_datetime(expand = c(0, 0),
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::unit_format(unit = "s", accuracy = .1)) +
  coord_cartesian(ylim = c(1, 4)) +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  scale_fill_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Response time",
       colour = "School year",
       fill = "School year",
       lty = "Question type") +
  guides(colour = guide_legend(order = 1),
         fill = guide_legend(order = 1),
         lty = guide_legend(order = 2)) +
  theme_paper

p_rt_year

ggsave("../output/rt_by_question_type_year.pdf", width = 9, height = 5)
ggsave("../output/rt_by_question_type_year.png", width = 9, height = 5)
```


### Regression model

```{r}
rt_model_dat <- rt[, .(rt_median = median(rt)), by = .(course, school_year, period, doy_posix, mcq, user)]
```

Fit a generalised linear mixed effects model (assuming a Gamma distribution for RT and an identity link function; Lo & Andrew, 2015) to the daily median RT:
```{r}
if(!file.exists("../output/m_rt_fit.rds")) {
  m_rt <- glmer(rt_median ~ period*school_year*mcq + (1 | user) + (1 | course),
                 data = rt_model_dat[(course == "English" & mcq == TRUE) | course == "French",],
                 family = Gamma(link = "identity"),
                 nAGQ = 0,
                 control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e6)))
  saveRDS(m_rt, "../output/m_rt_fit.rds")
} else {
  m_rt <- readRDS("../output/m_rt_fit.rds")
}

m_rt_summary <- summary(m_rt)
m_rt_summary
```

Save coefficients as a table for in the paper:
```{r}
m_rt_coef <- as.data.frame(m_rt_summary$coefficients)
setDT(m_rt_coef, keep.rownames = TRUE)
m_rt_coef$rn <- c("Intercept \\small{(Period: pre-lockdown, School year: 19/20, Question type: open answer)}",
                   "Period: lockdown",
                   "Period: post-lockdown",
                   "School year: 18/19",
                   "Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19",
                   "Period: post-lockdown $\\times$ School year: 18/19",
                   "Period: lockdown $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ Question type: multiple choice",
                   "School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice",
                   "Period: post-lockdown $\\times$ School year: 18/19 $\\times$ Question type: multiple choice")

# Format p-values
m_rt_coef$`Pr(>|z|)` <- format.pval(m_rt_coef$`Pr(>|z|)`, eps = .001, digits = 3, flag = "0")
m_rt_coef$`Pr(>|z|)` <- sub('^(<)?0[.]', '\\1.', m_rt_coef$`Pr(>|z|)`) # Remove leading zero

cat(knitr::kable(m_rt_coef,
                 align = c("l","r", "r", "r", "r"),
                 digits = c(NA, 3, 3, 2, NA),
                 col.names = c("Effect", "$b$", "SE", "$z$", "$p$"),
                 format = "latex",
                 booktabs = TRUE,
                 escape = FALSE),
    file = "../output/m_rt_table.tex")
```

Visualise the model fit:
```{r}
rt_fit <- expand.grid(period = c("pre-lockdown", "during-lockdown", "post-lockdown"), school_year = c("18/19", "19/20"), mcq = c(TRUE, FALSE))
rt_fit <- cbind(rt_fit, rt = predict(m_rt, type = "response", re.form = NA, newdata = rt_fit))
rt_fit

ggplot(rt_fit, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(1500, 4000)) +
  theme_paper
```

Empirical means:
```{r}
rt_mean <- rt_model_dat[(course == "English" & mcq == TRUE) | course == "French", .(rt = mean(rt_median)), by = .(period, school_year, mcq, user, course)][, .(rt = mean(rt), rt_sd = sd(rt)), by = .(period, school_year, mcq, course)]
rt_mean[, school_year := factor(school_year, levels = c("18/19", "19/20"))]
rt_mean
```

```{r}
ggplot(rt_mean, aes(x = period, y = rt, colour = school_year, lty = mcq, group = interaction(mcq, school_year))) +
  facet_grid(~ course) +
  geom_line() +
  geom_point() +
  scale_y_continuous(limits = c(1500, 3000)) +
  theme_paper
```


## Combination plot

```{r}
p_legend <- get_legend(p_acc)

p_acc <- p_acc +
  guides(colour = FALSE, fill = FALSE, lty = FALSE)

p_rt <- p_rt +
  guides(colour = FALSE, fill = FALSE, lty = FALSE)
```

Combine plots:
```{r}
plot_grid(
  plot_grid(p_acc, p_rt,
            ncol = 1,
            labels = c("A", "B")),
  p_legend,
  rel_widths = c(1, .2)
)

ggsave("../output/combi_acc_rt.pdf", width = 9, height = 3.5)
ggsave("../output/combi_acc_rt.png", width = 9, height = 3.5)
```



# Learning progress

Get the unique book chapter IDs on each day:
```{r}
db <- db_connect()

progress <- dbGetQuery(db,
                       "SELECT DISTINCT r.book_info_id AS 'book_info_id',
                        r.method AS 'method',
                        DATE(r.date + 3600, 'unixepoch') AS 'doy',
                        COUNT(*) AS 'trials'
                        FROM 'responses' r
                        GROUP BY r.method,
                        r.book_info_id,
                        DATE(r.date + 3600, 'unixepoch');"
                       )

db_disconnect(db)

setDT(progress)
```

Join with the book chapter information:
```{r}
db <- db_connect()
book_info <- dbGetQuery(db, "SELECT DISTINCT * FROM 'book_info'")
db_disconnect(db)

setDT(book_info)
```

```{r}
progress[book_info[book_type == "Hoofdboek",], on  = "book_info_id", c("method_group", "book_title", "chapter") := .(i.method_group, i.book_title, i.chapter)]
```

Add sensible course names:
```{r}
progress[, course := ifelse(method == "Grandes Lignes", "French", ifelse(method == "Stepping Stones", "English", "German"))]
```

Add a school year column (cutoff date: 1 August):
```{r}
progress[, doy_posix := as.POSIXct(doy)]
progress[, school_year := ifelse(doy_posix < "2019-08-01", "18/19", "19/20")]
```

Consolidate count by day and chapter:
```{r}
progress_by_day <- progress[, .(trials = sum(trials)), by = .(school_year, doy_posix, course, method_group, book_title, chapter)]
```


Simplify level names:
```{r}
# Keep all distinctions
progress_by_day[, book_title_simple := stringr::str_sub(book_title, 3, -10)]
progress_by_day[, book_title_simple := factor(book_title_simple, levels = c("vmbo b/lwoo", "vmbo b", "vmbo bk", "vmbo k", "vmbo kgt", "vmbo-gt", "vmbo gt", "vmbo-gt/havo", "vmbo (t)hv", "havo", "havo vwo", "vwo"))]

# Simplify to three levels
progress_by_day[, level := dplyr::case_when(
  grepl( "hv", book_title) ~ "General secondary (havo)",
  grepl("vmbo", book_title) ~ "Pre-vocational (vmbo)",
  grepl("havo", book_title) ~ "General secondary (havo)",
  grepl("vwo", book_title) ~ "Pre-university (vwo)",
  TRUE ~ "Other")]
progress_by_day[, level := factor(level, levels = c("Other", "Pre-vocational (vmbo)", "General secondary (havo)", "Pre-university (vwo)"))]
```

Simplify year names:
```{r}
progress_by_day[, year := dplyr::case_when(
  method_group == "Leerjaar 1 (5e Ed.)" ~ "Year 1",
  method_group == "Leerjaar 2 (5e Ed.)" ~ "Year 2",
  method_group == "Leerjaar 3 (5e Ed.)" ~ "Year 3",
  method_group == "Leerjaar 3/4 (5e Ed.)" ~ "Year 3/4",
  method_group == "Leerjaar 4 (5e Ed.)" ~ "Year 4",
  method_group == "Tweede Fase (6e Ed.)" ~ "Tweede Fase",
  TRUE ~ "Other")]
```


Simplify chapter names:
```{r}
# In most cases, the chapter name starts with a number
progress_by_day[, chapter_simple := factor(as.numeric(stringr::str_extract(chapter, "^\\d{1,2}")))]

# Remaining cases:
unique(progress_by_day[is.na(chapter_simple),]$chapter)

# Combine these chapters into an "other" category
progress_by_day[is.na(chapter_simple), chapter_simple := "O"]
```


Align school years:
```{r}
progress_by_day[school_year == "18/19", doy_posix_aligned := as.POSIXct(doy_posix + 365*24*60*60, origin = "1970-01-01")]
progress_by_day[school_year == "19/20", doy_posix_aligned := doy_posix]
```

Use cut.Date() to bin dates by week and month. Each day is assigned the date of the most recent Monday.
```{r}
progress_by_day[, doy_posix_aligned_week := cut.POSIXt(doy_posix_aligned, "week")]
progress_by_day[, doy_posix_aligned_month := cut.POSIXt(doy_posix_aligned, "month")]
```

Calculate proportions by week and month:
```{r}
progress_by_week <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_week, course, level, year, chapter_simple)]
progress_by_week[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_week, course, level, year)]

progress_by_month <- progress_by_day[, .(trials = sum(trials)), by = .(school_year, doy_posix_aligned_month, course, level, year, chapter_simple)]
progress_by_month[, prop := trials/sum(trials), by = .(school_year, doy_posix_aligned_month, course, level, year)]
```

```{r}
setorder(progress_by_week, chapter_simple)
setorder(progress_by_month, chapter_simple)
```


### French
```{r}
p_french_y1 <- ggplot(progress_by_week[course == "French" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_french_y2 <- ggplot(progress_by_week[course == "French" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_french_y3 <- ggplot(progress_by_week[course == "French" & year == "Year 3/4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_progress_french <- plot_grid(p_french_y1, p_french_y2, p_french_y3, 
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3/4"),
          hjust = -0.1,
          scale = .95)

p_progress_french

ggsave("../output/progress_french.pdf", width = 9, height = 9)
ggsave("../output/progress_french.png", width = 9, height = 6)
```


Did the share of trials change between school years?
We can simplify the analysis by aggregating over the whole lockdown period.
```{r}
progress_lockdown <- progress_by_day[between(doy_posix_aligned, date_schools_closed, date_schools_opened), .(trials = sum(trials)), by = .(school_year, course, level, year, chapter_simple)]

# Fill in missing rows (occurs when chapter was only studied in one of the two years)
progress_lockdown <- as.data.table(tidyr::complete(progress_lockdown, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0))) 
  
progress_lockdown[, prop := trials/sum(trials), by = .(school_year, course, level, year)]

setorder(progress_lockdown, chapter_simple)
```

```{r}
ggplot(progress_lockdown[course == "French"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(level ~ year) +
  geom_col(colour = NA) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter",
       title = "French") +
  theme_paper
```

Perform a chi-square test of homogeneity to determine whether school years are significantly different.

```{r}
for (y in sort(unique(progress_lockdown$year))) {
  for (l in levels(progress_lockdown$level)) {
    d <- progress_lockdown[course == "French" & year == y & level == l]
    if (nrow(d) > 0) {
      print(paste("French", y, l, collapse= " "))
      print(
        chisq.test(
          dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
        )
      )
    }
  } 
}
```

Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).

Visualise the change between school years:
```{r}
progress_lockdown[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]

ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(x = chapter_simple, y = (prop_change * 100), colour = chapter_simple, group = school_year)) +
  facet_grid(level ~ year, scales = "free_x") +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(xend = chapter_simple), yend = 0) +
  geom_point() +
  scale_y_continuous(limits = c(-25, 25)) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)",
       title = "French") +
  theme_paper
```

Are these changes really important? We may expect a certain amount of fluctuation between any pair of school years. We don't have data from before the 18/19 school year, but we can look at how the magnitude of changes during the lockdown period compares to changes earlier in the school year.

To keep things as comparable as possible, use a sliding time window with the same size as the lockdown period:
```{r}
time_window <- as.numeric(round(date_schools_opened - date_schools_closed))
time_window
```

```{r}
date_range <- sort(unique(progress_by_day$doy_posix_aligned))
date_range <- date_range[date_range < date_schools_closed]

prop_change_window <- data.table()

for (i in 1:(length(date_range) - as.numeric(time_window))) {
  d <- date_range[i:(i + time_window - 1)]
  progress_window <- progress_by_day[course %in% c("French", "English") & doy_posix_aligned %in% d,
                                     .(trials = sum(trials)),
                                     by = .(school_year, course, level, year, chapter_simple)]
  
  # Fill in missing rows (occurs when chapter was only studied in one of the two years)
  progress_window <- as.data.table(tidyr::complete(progress_window, tidyr::nesting(course, level, year, chapter_simple), school_year, fill = list(trials = 0))) 
  
  progress_window[, prop := trials/sum(trials), by = .(school_year, course, level, year)]

  progress_window[, prop_change := prop[school_year == "19/20"] - prop[school_year == "18/19"], by = .(course, level, year, chapter_simple)]
  
  prop_change_window <- rbind(prop_change_window, progress_window[school_year == "19/20",][,window := i][,date_min := min(d)][,date_max := max(d)])
}
```


The density of year-to-year changes can be visualised by time window:
```{r}
ggplot(prop_change_window, aes(x = prop_change * 100, y = window, group = window)) +
  ggridges::geom_density_ridges(alpha = 0.1, scale = 25, fill = NA) +
  labs(x = "Change in trial share\n(percentage points)",
         y = "Time window") +
  theme_paper
```

Compare the aggregated density to the changes during the lockdown period:
```{r}
prop_change_combined <- rbind(prop_change_window[, period := "Pre-lockdown"], progress_lockdown[course %in% c("French", "English") & school_year == "19/20", period := "Lockdown"], fill = TRUE)
prop_change_combined[, period := factor(period, levels = c("Pre-lockdown", "Lockdown"))]

ggplot(prop_change_combined, aes(x = prop_change, colour = period)) +
  geom_density() +
  scale_colour_viridis_d(end = .5, direction = -1, na.translate = FALSE) +
  labs(x = "Change in trial share\n(percentage points)",
       y = "Density",
       colour = NULL) +
  theme_paper
```

```{r}
prop_change_sd <- prop_change_window[, .(sd = sd(prop_change) * 100), by = .(course, year, level)]
```


Add boundaries based on the typical spread to the change plot:
```{r}
p_change_french <- ggplot(progress_lockdown[school_year == "19/20" & course == "French"], aes(colour = chapter_simple)) +
  facet_grid(year ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
  scale_y_continuous(breaks = c(-20, 0, 20)) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)") +
  theme_paper +
  theme(panel.grid.major.y = element_blank())

p_change_french

ggsave("../output/progress_change_french.pdf", width = 5, height = 4)
ggsave("../output/progress_change_french.png", width = 9, height = 3)
```

Make a combination plot for in the paper:
```{r}
plot_grid(p_french_y1, p_french_y2, p_french_y3, p_change_french,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3/4", "Change"),
          rel_heights = c(1, 1, 1, 1.5),
          hjust = -0.1,
          scale = .95)

ggsave("../output/progress_combi_french.pdf", width = 9, height = 9)
ggsave("../output/progress_combi_french.png", width = 9, height = 9)
```


```{r}
p_change_french_y1 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 1"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 1"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20), limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_french_y2 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 2"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 2"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20),  limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_french_y3 <- ggplot(progress_lockdown[school_year == "19/20" & course == "French" & year == "Year 3/4"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "French" & year == "Year 3/4"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-20, 0, 20),  limits = c(-30, 30), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

filler_plot <- qplot() + 
  theme_nothing() + 
  theme(panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

plot_grid(
          filler_plot,
          p_french_y1, filler_plot, p_change_french_y1, filler_plot,
          p_french_y2, filler_plot, p_change_french_y2, filler_plot, 
          p_french_y3, filler_plot, p_change_french_y3,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c(NA,
                     "Year 1", NA, NA, NA,
                     "Year 2", NA, NA, NA,
                     "Year 3/4", NA, NA),
          rel_heights = c(.1, 
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75),
          hjust = -0.1,
          vjust = -0.1,
          scale = .95)

ggsave("../output/progress_combi_alt_french.pdf", width = 9, height = 9)
ggsave("../output/progress_combi_alt_french.png", width = 9, height = 9)
```


### English

NOTE: chapters without a number (Bridging the Gap, Exam Preparation) are shown as "O" in the plot.
They don't seem to fit neatly in the chapter sequence, so I'm grouping them together.
```{r}
p_english_y1 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 1"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y2 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 2"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y3 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 3"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_english_y4 <- ggplot(progress_by_week[level != "Other"][, level := factor(level)][course == "English" & year == "Year 4"], aes(x = as.POSIXct(doy_posix_aligned_week), y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(school_year ~ level, drop = FALSE) +
  geom_col(alpha = 0.75, width = 7*24*60*60, colour = NA) +
  geom_rect(xmin = date_schools_closed, xmax = date_schools_opened, ymin = -0.01, ymax = 1.01, fill = NA, colour = "black", lty = 2) +
  scale_x_datetime(expand = c(0, 0), 
                   breaks = as.POSIXct(c(
                     "2019-10-01 02:00:00 CET",
                     "2019-12-01 02:00:00 CET",
                     "2020-02-01 02:00:00 CET",
                     "2020-04-01 02:00:00 CET",
                     "2020-06-01 02:00:00 CET")),
                   limits = as.POSIXct(c("2019-09-01 02:00:00 CET", "2020-07-01 02:00:00 CET")),
                   date_labels = "%b") +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1), breaks = c(0, .5 , 1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  guides(fill = guide_legend(ncol = 2)) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter") +
  theme_paper

p_progress_english <- plot_grid(p_english_y1, p_english_y2, p_english_y3, p_english_y4,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c("Year 1", "Year 2", "Year 3", "Year 4"),
          hjust = -0.1,
          scale = .95)

p_progress_english

ggsave("../output/progress_english.pdf", width = 9, height = 9)
ggsave("../output/progress_english.png", width = 9, height = 9)
```

Did the share of trials change between school years?

```{r}
ggplot(progress_lockdown[course == "English" & level != "Other"], aes(x = school_year, y = prop, fill = chapter_simple, group = school_year)) +
  facet_grid(level ~ year) +
  geom_col(colour = NA) +
  scale_y_continuous(labels = scales::percent_format(), limits = c(0,1)) +
  scale_fill_viridis_d(direction = -1, na.translate = FALSE) +
  labs(x = NULL,
       y = "Share of trials",
       fill = "Chapter",
       title = "English") +
  theme_paper
```

Change between school years:
```{r}
p_change_english <- ggplot(progress_lockdown[school_year == "19/20" & course == "English" & level != "Other"], aes(colour = chapter_simple)) +
  facet_grid(year ~ level, scales = "free_x") +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other"], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100))) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = "Chapter",
       y = "Change in trial share\n(percentage points)") +
  theme_paper +
  theme(panel.grid.major.y = element_blank())

p_change_english 

ggsave("../output/progress_change_english.pdf", width = 9, height = 6)
ggsave("../output/progress_change_english.png", width = 9, height = 6)
```

Perform a chi-square test of homogeneity to determine whether school years are significantly different.

```{r}
for (y in sort(unique(progress_lockdown$year))) {
  for (l in levels(progress_lockdown$level)) {
    d <- progress_lockdown[course == "English" & year == y & level == l]
    if (nrow(d) > 0) {
      print(paste("English", y, l, collapse= " "))
      print(
        chisq.test(
          dcast(d, school_year ~ chapter_simple, value.var = "trials", fill = 0)[, school_year := NULL]
        )
      )
    }
  } 
}
```

Conclusion: all tests indicate a difference in proportions between school years (p << 0.001).


Make a combination plot for in the paper:
```{r}
progress_lockdown_english <- progress_lockdown[level != "Other" & school_year == "19/20" & course == "English"]
progress_lockdown_english[, level := factor(level)]

p_change_english_y1 <- ggplot(progress_lockdown_english[year == "Year 1"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 1"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y2 <- ggplot(progress_lockdown_english[year == "Year 2"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 2"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y3 <- ggplot(progress_lockdown_english[year == "Year 3"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 3"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

p_change_english_y4 <- ggplot(progress_lockdown_english[year == "Year 4"], aes(colour = chapter_simple)) +
  facet_grid(. ~ level, scales = "free_x", drop = FALSE) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -2*sd, ymax = 2*sd), xmin = 0, xmax = 1000, fill = "grey90", colour = NA) +
  geom_rect(data = prop_change_sd[course == "English" & level != "Other" & year == "Year 4"][,level := factor(level)], aes(ymin = -sd, ymax = sd), xmin = 0, xmax = 100, fill = "grey75", colour = NA) +
  geom_hline(yintercept = 0, lty = 2) +
  geom_segment(aes(x = chapter_simple, xend = chapter_simple, y = (prop_change * 100)), yend = 0, alpha = .75) +
  geom_point(aes(x = chapter_simple, y = (prop_change * 100)), alpha = .75) +
  scale_y_continuous(breaks = c(-10, 0, 10), limits = c(-20, 20), labels = scales::number_format(suffix = " pp")) +
  scale_colour_viridis_d(direction = -1, na.translate = FALSE) +
  guides(colour = FALSE) +
  labs(x = NULL,
       y = "Change") +
  theme_paper +
  theme(panel.grid.major.y = element_blank(),
        strip.background = element_blank(),
        strip.text.x = element_blank(),
        panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

filler_plot <- qplot() + 
  theme_nothing() + 
  theme(panel.background = element_rect(fill = "transparent", colour = NA),
        plot.background = element_rect(fill = "transparent", colour = NA))

plot_grid(
          filler_plot,
          p_english_y1, filler_plot, p_change_english_y1, filler_plot,
          p_english_y2, filler_plot, p_change_english_y2, filler_plot, 
          p_english_y3, filler_plot, p_change_english_y3, filler_plot,
          p_english_y4, filler_plot, p_change_english_y4,
          ncol = 1,
          align = "hv", axis = "tblr",
          labels = c(NA,
                     "Year 1", NA, NA, NA,
                     "Year 2", NA, NA, NA,
                     "Year 3", NA, NA, NA,
                     "Year 4", NA, NA),
          rel_heights = c(.1, 
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75, .1,
                          1, -.2, .75),
          hjust = -0.1,
          vjust = -0.1,
          scale = .95)

ggsave("../output/progress_combi_alt_english.pdf", width = 9, height = 11)
ggsave("../output/progress_combi_alt_english.png", width = 9, height = 11)
```

# Session info
```{r}
sessionInfo()
```

